/* smog_pollution_analysis.do: This script runs the main regression analyses presented in the paper
for NOx, CO, and Ozone, resulting in Tables 2, 3, and 4, Figure 3, and appendix figure A6*/

capture log close

clear all

set more off
set matsize 2000

cd ${path}analysis

log using smog_pollution_analysis.log, replace


use pollution_star_merged

xtset county date

/*First, divide the counts of inspections by 1000, to make the coefficients easier to read*/

gen lag = 0
foreach var of varlist  *pass* firstgen* othergen* {
	qui replace `var' = (`var')/1000
}


/* Generate day-of-week and calendar week*/

gen dow = dow(date)
gen week = wofd(date)

/*Create some index variables for storing the "bin" coefficients"*/

gen newcars = _n>20 in 1/40
gen bin = _n-1 -20*(_n>20) in 1/40

gen date2 = date^2
gen date3 = date^3

/*Declare the "if" conditions in a local, as well as the full set of controls*/

local conditions  if year>=1998 & inlist(county,5,12,18,22,26,53)==0 & (inlist(county,3,6,11,12,13,17,23,47,58)==0 | (year !=2002&year!=2003)) 
local controls tMin tMax prec ib1.county  ib1.county#i.year i.week 


/* Convenience estadd subcommand for adding F tests*/
capture program drop estadd_oldnewtest
program estadd_oldnewtest, eclass
	test firstgen_90 = othergen_90
	ereturn scalar oldnew_F = r(F)
	ereturn scalar oldnew_p = r(p)
end



/*Now run regressions for each pollutant*/

foreach pollutant in nox ozone co {

	/*Multiply the pollutant variables by 1000 to change to ppb (again, to make coefficients readable*/

	replace `pollutant' = `pollutant'*1000
	
	/*First set of regressions: Using all re-inspections, build up controls */
	
	local counter = 1
	
	eststo `pollutant'_buildup`counter++': regress `pollutant' firstgen_90 othergen_90 `conditions', vce(cluster county)
	eststo `pollutant'_buildup`counter++': regress `pollutant' firstgen_90 othergen_90 ib1.county i.week `conditions', vce(cluster county) 
	eststo `pollutant'_buildup`counter++': regress `pollutant' firstgen_90 othergen_90  ib1.county i.week ib1.county#i.year `conditions', vce(cluster county) 
	eststo `pollutant'_buildup`counter++': regress `pollutant' firstgen_90 othergen_90 `controls'  `conditions', vce(cluster county) 
	eststo `pollutant'_base
	
	
	/* Store tests of equality between old and new cars*/
	
	estadd oldnewtest: `pollutant'_buildup* `pollutant'_base
	

	/* Regressions interacting re-inspections with county-average station quality*/
	eststo `pollutant'_fpr_interaction_base: regress `pollutant' c.fpr_current_firstgen_90#c.firstgen_90 firstgen_90 c.fpr_current_othergen_90#c.othergen_90 othergen_90 `controls'  `conditions', vce(cluster county) 

	/*Calculate and add marginal effects at the mean of station quality*/
	margins, dydx(firstgen othergen) at((mean) fpr_current_firstgen_90) noestimcheck post

	foreach gen in firstgen othergen{
		local `gen'_ame = _b[`gen'_90]
		local `gen'_se=_se[`gen'_90]
		estadd scalar `gen'_ame=  ``gen'_ame' : `pollutant'_fpr_interaction_base
		estadd scalar `gen'_se= ``gen'_se' : `pollutant'_fpr_interaction_base
		global `pollutant'_`gen'_star = cond(abs(_b[`gen'_90]/_se[`gen'_90]) < invt(e(N),.95),"",cond(abs(_b[`gen'_90]/_se[`gen'_90]) < invt(e(N),.975),"*",cond(abs(_b[`gen'_90]/_se[`gen'_90]) < invt(e(N),.995),"**","***")))
	
	}

	test firstgen = othergen
	estadd scalar oldnew_F = r(F) : `pollutant'_fpr_interaction_base
	estadd scalar oldnew_p = r(p) : `pollutant'_fpr_interaction_base


	/* Now re-estimate the model with re-inspections "binned" by FPR score*/
	
	eststo `pollutant'_bin: regress `pollutant' firstgen_fprbin*_90 othergen_fprbin*_90 `absorbvars' `controls'  `conditions', vce(cluster county) 
	
	gen `pollutant'_fprbin_coef=.
	gen `pollutant'_fprbin_ub = .
	gen `pollutant'_fprbin_lb = .
	local row = 1
	forvalues i = 0/19{
		replace `pollutant'_fprbin_coef = _b[firstgen_fprbin`i'_90] if newcar == 0 & bin == `i'
		replace `pollutant'_fprbin_coef = _b[othergen_fprbin`i'_90] if newcar == 1 & bin == `i'
		
		replace `pollutant'_fprbin_ub =_b[firstgen_fprbin`i'_90]+ _se[firstgen_fprbin`i'_90] * invnormal(1-.025/20) if newcar == 0 & bin == `i'
		replace `pollutant'_fprbin_lb =_b[firstgen_fprbin`i'_90]- _se[firstgen_fprbin`i'_90] * invnormal(1-.025/20) if newcar == 0 & bin == `i'
		
		replace `pollutant'_fprbin_ub = _b[othergen_fprbin`i'_90] + _se[othergen_fprbin`i'_90] * invnormal(1-.025/20) if newcar == 1 & bin == `i'
		replace `pollutant'_fprbin_lb = _b[othergen_fprbin`i'_90] - _se[othergen_fprbin`i'_90] * invnormal(1-.025/20) if newcar == 1 & bin == `i'
	}
	

}

/* Now do ozone again, controlling for NOx levels*/

eststo ozone_nox: regress ozone c.firstgen_90 c.othergen_90 c.nox##c.nox `controls' `conditions', vce(cluster county)  

estadd oldnewtest

eststo ozone_nox_inter: regress ozone c.fpr_current_firstgen_90#c.firstgen_90 firstgen_90 c.fpr_current_othergen_90#c.othergen_90 othergen_90 c.nox##c.nox `controls'  `conditions', vce(cluster county)  

	margins, dydx(firstgen othergen) at((mean) fpr_current_firstgen_90) noestimcheck  post

	foreach gen in firstgen othergen{
		local ame = _b[`gen'_90]
		local ame_se=_se[`gen'_90]
		estadd scalar `gen'_ame = `ame' : ozone_nox_inter
		estadd scalar `gen'_se = `ame_se' : ozone_nox_inter
		global ozone_`gen'_star = cond(abs(_b[`gen'_90]/_se[`gen'_90]) < invt(e(N),.95),"",cond(abs(_b[`gen'_90]/_se[`gen'_90]) < invt(e(N),.975),"*",cond(abs(_b[`gen'_90]/_se[`gen'_90]) < invt(e(N),.995),"**","***")))
	}

	test firstgen = othergen
	estadd scalar oldnew_F = r(F) : ozone_nox_inter
	estadd scalar oldnew_p = r(p) : ozone_nox_inter

	regress ozone firstgen_fprbin*_90 othergen_fprbin*_90 c.nox##c.nox `controls'  `conditions', vce(cluster county)  
	
	forvalues i = 0/19{
		replace ozone_fprbin_coef = _b[firstgen_fprbin`i'_90] if newcar == 0 & bin == `i'
		replace ozone_fprbin_coef = _b[othergen_fprbin`i'_90] if newcar == 1 & bin == `i'

	}



/* Table 2 : Build-up, with CO in panel 1 and NOx in panel 2*/
#delimit ;

esttab co_buildup* using buildup.tex, replace se booktabs label nomtitles star( * .1 ** .05 *** .01)
indicate("County FE = *.county"  "Calendar Week FE = *.week" 
/*"County Linear Trends = *y#c.date" "County Quadratic Trends = *y#c.date#c.date" "County Cubic Trends = *y#c.date#c.date#c.date"*/ "County-Year FE = *.year" "Weather Controls = tMin tMax prec") 
title(Re-Inspections and County-Level Daily Air Quality\label{tab:buildup})
scalars("oldnew_F F-test of Equality" "oldnew_p \$ P > F\$")
varlabels(firstgen_90 "000s of Re-Inspections Last 90 Days\\ \quad\quad 1975-1985 Vehicles"  othergen_90 "\quad\quad 1985+ Vehicles" fail_count_90 "000s of failures last 90 days")
postfoot( ) nocons  mgroup("A: Outcome is Carbon Monoxide (PPB)", pattern(1 0 0 0 0) span prefix(\multicolumn{@span}{c}{) suffix(}))
substitute( "\begin{tabular}" "\setlength{\linewidth}{.1cm}\newcommand{\contents}{\begin{tabular}"
            "\end{tabular}" "\end{tabular}}\setbox0=\hbox{\contents}\setlength{\linewidth}{\wd0-2\tabcolsep-.25em}\contents"
            "{l}{\footnotesize" "{p{\linewidth}}{\footnotesize")
;

esttab nox_buildup* using buildup.tex, append se booktabs label nomtitles  star( * .1 ** .05 *** .01)
indicate("County FE = *.county"  "Calendar Week FE = *.week" 
/*"County Linear Trends = *y#c.date" "County Quadratic Trends = *y#c.date#c.date" "County Cubic Trends = *y#c.date#c.date#c.date"*/ "County-Year FE = *.year"  "Weather Controls = tMin tMax prec"  ) 
mgroup("B: Outcome is \nox\ (PPB)", pattern(1 0 0 0 0) span prefix(\multicolumn{@span}{c}{) suffix(}))
scalars("oldnew_F F-test of Equality" "oldnew_p \$ P > F\$")
varlabels(firstgen_90 "000s of Re-Inspections Last 90 Days\\ \quad\quad 1975-1985 Vehicles"  othergen_90 "\quad\quad 1985+ Vehicles" fail_count_90 "000s of failures last 90 days")
prehead(\midrule )  nocons postfoot(\bottomrule @starlegend \\ \multicolumn{@span}{l}{\footnotesize @note} \end{tabular}\end{table})
note(Note: Observations are county-days.  Standard errors clustered by county reported in parentheses.  F-tests of equality report the F-statistics from testing the null hypothesis that the coefficient on 1975--1985 model year vehicles equals the coefficient on 1985+ model year vehicles. )
substitute( "\begin{tabular}" "\setlength{\linewidth}{.1cm}\newcommand{\contents}{\begin{tabular}"
            "\end{tabular}" "\end{tabular}}\setbox0=\hbox{\contents}\setlength{\linewidth}{\wd0-2\tabcolsep-.25em}\contents"
            "{l}{\footnotesize" "{p{\linewidth}}{\footnotesize")
;





/* Table 3: Interactions with average FPR score*/

#delimit ;

esttab co_base  co_fpr_interaction_base  using fpr_interaction.tex, replace se booktabs label nomtitles  star( * .1 ** .05 *** .01)
drop(tMin tMax prec *.county *.week *date _cons *othergen* *year, relax ) 
stats(firstgen_ame firstgen_se, label("\emph{Effect at Mean Station Quality}" " ") layout(\emph{@}\sym{${co_firstgen_star}} (\emph{@})))
title(Station Quality and County-Level Air Pollution\label{tab:fpr-interaction})
varlabels(firstgen_90 "000s of Re-Inspections Last 90 Days\\ \quad\quad 1975-1985 Vehicles"  othergen_90 "\quad\quad 1985+ Vehicles"
c.fpr_current_firstgen_90#c.firstgen_90 "\quad\quad 1975-1985 Vehicles \$\cdot\$ Quality" 
c.fpr_current_othergen_90#c.othergen_90 "\quad\quad 1985+ Vehicles \$\cdot\$ Quality")
order(firstgen_90 c.fpr_current_firstgen_90#c.firstgen_90 ) 
postfoot( ) prefoot( ) nocons noobs mgroup("A: Outcome is CO (PPB)", pattern(1 0 0 0 0) span prefix(\multicolumn{@span}{c}{) suffix(}))
note(Note: Observations are county-days.  All regressions control for daily weather, county fixed effects, calendar week fixed effects, and county-year fixed effects. Standard errors clusted by county reported in parentheses.)
substitute( "\begin{tabular}" "\setlength{\linewidth}{.1cm}\newcommand{\contents}{\begin{tabular}"
            "\end{tabular}" "\end{tabular}}\setbox0=\hbox{\contents}\setlength{\linewidth}{\wd0-2\tabcolsep-.25em}\contents"
            "{l}{\footnotesize" "{p{\linewidth}}{\footnotesize")
;

esttab co_base  co_fpr_interaction_base  using fpr_interaction.tex, append se booktabs label nomtitles star( * .1 ** .05 *** .01)
drop(tMin tMax prec *.county *.week *date _cons *firstgen* *year, relax ) 
stats(othergen_ame othergen_se oldnew_F oldnew_p, label("\emph{Effect at Mean Station Quality}" " " "F-test of Equality" "\$P > F \$") layout(\emph{@}\sym{${co_othergen_star}}  (\emph{@}) @ @))
varlabels(firstgen_90 "000s of Re-Inspections Last 90 Days\\ \quad\quad 1975-1985 Vehicles"  othergen_90 "\quad\quad 1985+ Vehicles"
c.fpr_current_firstgen_90#c.firstgen_90 "\quad\quad 1975-1985 Vehicles \$\cdot\$ Station Quality" 
c.fpr_current_othergen_90#c.othergen_90 "\quad\quad 1985+ Vehicles \$\cdot\$ Station Quality")
order(othergen_90 c.fpr_current_othergen_90#c.othergen_90)
postfoot( ) prefoot( ) prehead( ) posthead( ) nocons noobs nonum
note(Note: Observations are county-days.  All regressions control for daily weather, county fixed effects, calendar week fixed effects, and county-year fixed effects. Standard errors clusted by county reported in parentheses.)
substitute( "\begin{tabular}" "\setlength{\linewidth}{.1cm}\newcommand{\contents}{\begin{tabular}"
            "\end{tabular}" "\end{tabular}}\setbox0=\hbox{\contents}\setlength{\linewidth}{\wd0-2\tabcolsep-.25em}\contents"
            "{l}{\footnotesize" "{p{\linewidth}}{\footnotesize")
;


esttab nox_base  nox_fpr_interaction_base using fpr_interaction.tex, append se booktabs label nomtitles  star( * .1 ** .05 *** .01)
drop(tMin tMax prec *.county *.week *date _cons *othergen* *year, relax )  
stats(firstgen_ame firstgen_se, label("\emph{Effect at Mean Station Quality}" " ") layout(\emph{@}\sym{${nox_firstgen_star}} (\emph{@})))
mgroup("B: Outcome is \nox\ (PPB)", pattern(1 0 0 0 0) span prefix(\multicolumn{@span}{c}{) suffix(}))
varlabels(firstgen_90 "000s of Re-Inspections Last 90 Days\\ \quad\quad 1975-1985 Vehicles"  othergen_90 "\quad\quad 1985+ Vehicles"
c.fpr_current_firstgen_90#c.firstgen_90 "\quad\quad 1975-1985 Vehicles \$\cdot\$ Station Quality" 
c.fpr_current_othergen_90#c.othergen_90 "\quad\quad 1985+ Vehicles \$\cdot\$ Station Quality")
order(firstgen_90 c.fpr_current_firstgen_90#c.firstgen_90  )
prehead(\midrule )  nocons noobs postfoot( ) prefoot( )
note(Note: Observations are county-days.  All regressions control for daily weather, county fixed effects, calendar week fixed effects, and county-year fixed effects. Standard errors clusted by county reported in parentheses.)
substitute( "\begin{tabular}" "\setlength{\linewidth}{.1cm}\newcommand{\contents}{\begin{tabular}"
            "\end{tabular}" "\end{tabular}}\setbox0=\hbox{\contents}\setlength{\linewidth}{\wd0-2\tabcolsep-.25em}\contents"
            "{l}{\footnotesize" "{p{\linewidth}}{\footnotesize")
;

esttab nox_base  nox_fpr_interaction_base  using fpr_interaction.tex, append se booktabs label nomtitles  star( * .1 ** .05 *** .01)
drop(tMin tMax prec *.county *.week *date _cons *firstgen* *year, relax )  
stats(othergen_ame othergen_se oldnew_F oldnew_p, label("\emph{Effect at Mean Station Quality}" " " "F-test of Equality" "\$P > F \$") layout(\emph{@}\sym{${nox_othergen_star}}  (\emph{@}) @ @))
varlabels(firstgen_90 "000s of Re-Inspections Last 90 Days\\ \quad\quad 1975-1985 Vehicles"  othergen_90 "\quad\quad 1985+ Vehicles"
c.fpr_current_firstgen_90#c.firstgen_90 "\quad\quad 1975-1985 Vehicles \$\cdot\$ Station Quality" 
c.fpr_current_othergen_90#c.othergen_90 "\quad\quad 1985+ Vehicles \$\cdot\$ Station Quality")
order(  othergen_90 c.fpr_current_othergen_90#c.othergen_90)
prehead( ) posthead( ) prefoot( ) nonum  nocons postfoot(\bottomrule @starlegend \\ \multicolumn{@span}{l}{\footnotesize @note} \end{tabular}\end{table})
note(Note: Observations are county-days.  All regressions control for daily weather, county fixed effects, calendar week fixed effects, and county-year fixed effects. Standard errors clustered by county reported in parentheses. F-tests of equality report the F-statistic from testing the null hypothesis that either the coefficient (column 1) or the average marginal effect (column 2) of 1975--1985 model year vehicles equals that of 1985+ model year vehicles.)
substitute( "\begin{tabular}" "\setlength{\linewidth}{.1cm}\newcommand{\contents}{\begin{tabular}"
            "\end{tabular}" "\end{tabular}}\setbox0=\hbox{\contents}\setlength{\linewidth}{\wd0-2\tabcolsep-.25em}\contents"
            "{l}{\footnotesize" "{p{\linewidth}}{\footnotesize")
;

/* Table 4: Ozone*/


#delimit ;

esttab ozone_base  ozone_fpr_interaction_base ozone_nox ozone_nox_inter using ozone.tex, replace se booktabs label nomtitles  star( * .1 ** .05 *** .01)
drop(tMin tMax prec *.county *.week *date _cons *othergen* *nox* *year, relax ) 
stats(firstgen_ame firstgen_se, label("\emph{Effect at Mean Station Quality}" " ") layout(\emph{@}\sym{${ozone_firstgen_star}} (\emph{@})))
title(Station Quality and County-Level Ozone Pollution\label{tab:ozone})
varlabels(firstgen_90 "000s of Re-Inspections Last 90 Days\\ \quad\quad 1975-1985 Vehicles"  othergen_90 "\quad\quad 1985+ Vehicles"
c.fpr_current_firstgen_90#c.firstgen_90 "\quad\quad 1975-1985 Vehicles \$\cdot\$ Station Quality" 
c.fpr_current_othergen_90#c.othergen_90 "\quad\quad 1985+ Vehicles \$\cdot\$ Station Quality")
order(firstgen_90 c.fpr_current_firstgen_90#c.firstgen_90 ) 
postfoot( ) prefoot( ) nocons noobs mgroup("Base" "\nox\ Controls", pattern(1 0 1 0) erepeat(\cmidrule(lr){@span}) span prefix(\multicolumn{@span}{c}{) suffix(}))
note(Note: Observations are county-days.  All regressions control for daily weather, county fixed effects, calendar week fixed effects, and county-year fixed effects. Standard errors clusted by county reported in parentheses.)
substitute( "\begin{tabular}" "\setlength{\linewidth}{.1cm}\newcommand{\contents}{\begin{tabular}"
            "\end{tabular}" "\end{tabular}}\setbox0=\hbox{\contents}\setlength{\linewidth}{\wd0-2\tabcolsep-.25em}\contents"
            "{l}{\footnotesize" "{p{\linewidth}}{\footnotesize")

;

esttab ozone_base  ozone_fpr_interaction_base ozone_nox ozone_nox_inter  using ozone.tex, append se booktabs label nomtitles  star( * .1 ** .05 *** .01)
drop(tMin tMax prec *.county *.week *date _cons *firstgen* *nox* *year, relax )  
stats(othergen_ame othergen_se oldnew_F oldnew_p, label("\emph{Effect at Mean Station Quality}" " " "F-test of Equality" "\$P > F \$") layout(\emph{@}\sym{${ozone_othergen_star}}  (\emph{@}) @ @))
varlabels(firstgen_90 "000s of Re-Inspections Last 90 Days\\ \quad\quad 1975-1985 Vehicles"  othergen_90 "\quad\quad 1985+ Vehicles"
c.fpr_current_firstgen_90#c.firstgen_90 "\quad\quad 1975-1985 Vehicles \$\cdot\$ Station Quality" 
c.fpr_current_othergen_90#c.othergen_90 "\quad\quad 1985+ Vehicles \$\cdot\$ Station Quality")
order(  othergen_90 c.fpr_current_othergen_90#c.othergen_90)
prehead( ) posthead( ) prefoot( ) nonum  nocons postfoot(\bottomrule @starlegend \\ \multicolumn{@span}{l}{\footnotesize @note} \end{tabular}\end{table})
note(Note: Observations are county-days.  All regressions control for daily weather, county fixed effects, calendar week fixed effects, and county-year fixed effects.  Columns (3) and (4) control for a quadratic in the level of \nox. Standard errors clusted by county reported in parentheses. F-tests of equality report the F-statistic from testing the null hypothesis that either the coefficient (columns 1 and 3) or the average marginal effect (columns 2 and 4) of 1975--1985 model year vehicles equals that of 1985+ model year vehicles.)
substitute( "\begin{tabular}" "\setlength{\linewidth}{.1cm}\newcommand{\contents}{\begin{tabular}"
            "\end{tabular}" "\end{tabular}}\setbox0=\hbox{\contents}\setlength{\linewidth}{\wd0-2\tabcolsep-.25em}\contents"
            "{l}{\footnotesize" "{p{\linewidth}}{\footnotesize")
;



/*Figure 3: Re-inspections interacted with FPR bins*/
#delimit ;
replace bin = bin*.05;
set scheme s2color;

graph twoway scatter nox_fprbin_coef bin if newcar == 0 , msize(small) || scatter nox_fprbin_coef bin if newcar == 1, msize(small) msymbol(S)||
lowess nox_fprbin_coef bin if newcar == 0 || lowess nox_fprbin_coef bin if newcar == 1, lpattern(dash)||,
legend(order(- "1975-1985 Vehicles" - "1985+ Vehicles" 1 2 3 4) label(1 "Coefficients") label(2 "Coefficients") label(3 "Lowess Best Fit") label(4 "Lowess Best Fit")) 
ytitle(Change in NOX (PPB) per 1000 Re-inspections)
xtitle(Bottom of Station Quality Bin) graphregion(color(white)) title(NOx) name(nox, replace)
;

#delimit ;
graph twoway scatter co_fprbin_coef bin if newcar == 0 , msize(small) || scatter co_fprbin_coef bin if newcar == 1, msize(small) msymbol(S)||
lowess co_fprbin_coef bin if newcar == 0 || lowess co_fprbin_coef bin if newcar == 1, lpattern(dash)||,
legend(order(- "1975-1985 Vehicles" - "1985+ Vehicles" 1 2 3 4) label(1 "Coefficients") label(2 "Coefficients") label(3 "Lowess Best Fit") label(4 "Lowess Best Fit")) 
ytitle(Change in CO (PPB) per 1000 Re-inspections)
xtitle(Bottom of Station Quality Bin) graphregion(color(white)) title(CO) name(co, replace)
;

graph combine co nox, graphregion(color(white)) col(1) ysize(8) ;

graph export fpr_interaction_bin.pdf, replace; 



/*Figure XX: Re-inspections interacted with FPR bins for Ozone (broken out by low/high NOx days*/

#delimit ;
graph twoway scatter ozone_fprbin_coef bin if newcar == 0 , msize(small) || scatter ozone_fprbin_coef bin if newcar == 1, msize(small) msymbol(S)||
lowess ozone_fprbin_coef bin if newcar == 0 || lowess ozone_fprbin_coef bin if newcar == 1, lpattern(dash)||,
legend(order(- "1975-1985 Vehicles" - "1985+ Vehicles" 1 2 3 4) label(1 "Coefficients") label(2 "Coefficients") label(3 "Lowess Best Fit") label(4 "Lowess Best Fit")) 
ytitle(Change in Ozone (PPB) per 1000 Re-inspections)
xtitle(Bottom of Station Quality Bin) graphregion(color(white)) title(Ozone) name(ozone, replace)
;


graph export ozone_bin.pdf, replace;

#delimit cr



